Overview

In this assessment we aim to use the MACCDC conn data to perform data analysis and modelling. First we’ll import any libraries we intend to use.

#install.packages("dbscan")
#install.packages("cluster")
#install.packages("reshape")
#install.packages("ggplot2")
#install.packages("gridExtra")
#install.packages("Matrix")
#install.packages("irlba")
#install.packages("Rtsne")
#install.packages("umap")
#install.packages("uwot")
#install.packages("imager")
library(dbscan)
library(cluster)
library(reshape)
library(ggplot2)
library(gridExtra)
library(Matrix)
library(irlba)
library(Rtsne)
library(umap)
library(uwot)
library(imager)

We first must import the data.

mydata <- read.csv("MAC.csv")
mydata <- data.frame(mydata)
mydata

We first want to look for missing data. Service, duration, orig_bytes, resp_bytes and local_orig all seem to have missing data in them so we will see what percentage.

mtab0=data.frame(
    missingduration=is.na(mydata[,"duration"]),
    proto=mydata[,"proto"])
mtab0=table(mtab0)
(apply(mtab0,2,function(x)x/sum(x)))
               proto
missingduration      icmp       tcp       udp
          FALSE 0.8585338 0.1656118 0.3089144
          TRUE  0.1414662 0.8343882 0.6910856
mtab1=data.frame(
    missing_orig_bytes=is.na(mydata[,"orig_bytes"]),
    proto=mydata[,"proto"])
mtab1=table(mtab1)
(apply(mtab1,2,function(x)x/sum(x)))
                  proto
missing_orig_bytes      icmp       tcp       udp
             FALSE 0.8585338 0.1656118 0.3089144
             TRUE  0.1414662 0.8343882 0.6910856
mtab2=data.frame(
    missing_resp_bytes=is.na(mydata[,"resp_bytes"]),
    proto=mydata[,"proto"])
mtab2=table(mtab2)
(apply(mtab2,2,function(x)x/sum(x)))
                  proto
missing_resp_bytes      icmp       tcp       udp
             FALSE 0.8585338 0.1656118 0.3089144
             TRUE  0.1414662 0.8343882 0.6910856
mtab3=data.frame(
    missing_local_orig=is.na(mydata[,"local_orig"]),
    proto=mydata[,"proto"])
mtab3=table(mtab3)
(apply(mtab3,2,function(x)x/sum(x)))
icmp  tcp  udp 
   1    1    1 

Thus we are missing the local_orig feature for every data point in the data set. We may then consider dropping this entire column as it serves no use to us and we cannot impute the data without prior knowledge of the data set and what it should look like. The duration, orig_bytes and resp_bytes all appear to be missing exactly the same data - on further analysis, we see that whenever one is missing, all three are missing.

Some initial data cleansing will come from removing the X column and the ts column. The X column is produced by the sampling and since we have a random sample of the data, the ts provides no real information on the data.

unique_uid <- mydata[!duplicated(mydata[,c('uid')]),]
Error in `[.data.frame`(mydata, , c("uid")) : undefined columns selected

Thus all our uid’s are unique and therefore wont provide us with any extra information either since they will be uncorrelated with the rest of the data. This is the only column with this trait, and all other columns have values which occur more than once so we can drop the uid column too.

drop_columns <- c("X","ts","local_orig","uid")
mydata <- mydata[, !names(mydata) %in% drop_columns]
head(mydata)

So we have removed the columns that didn’t provide us with any extra information. We will now extract the data we will use for DBSCAN to create clusters. The following code is pulled from Alex’s workbook and allows us to pull out 7 of the features to use for DBSCAN and ensures all elements are numeric.

# miss.me <- vector(length = nrow(mydata))
# miss.me <- rep(0, times = nrow(mydata))
# for(i in 1:nrow(mydata)) {
#   if(is.na(mydata$duration[i])) { miss.me[i] <- 1 }
#   }
# str(mydata)
# mydata.good <- as.data.frame(cbind(id.orig_p = mydata$id.orig_p, id.resp_p = mydata$id.resp_p, 
# orig_pkts = mydata$orig_pkts, orig_ip_bytes = mydata$orig_ip_bytes, 
# resp_pkts = mydata$resp_pkts, resp_ip_bytes = mydata$resp_ip_bytes))
# mydata.good<- cbind(mydata.good, miss.me)
# head(mydata.good)
# str(mydata.good) # Should be only ints and nums
# 
# for(i in 1:ncol(mydata.good)) { mydata.good[,i] <- as.numeric(mydata.good[,i]) }
# str(mydata.good)      ## All should be nums now
# # sum(mydata.good$miss.me)/nrow(mydata.good) ## 82.7% missing

The data cleansing Alex performed wasn’t very conducive to allowing me to impute data so I will use the basis of his but make some small changes.

mydata.good <- as.data.frame(cbind(id.orig_p = mydata$id.orig_p, id.resp_p = mydata$id.resp_p, orig_pkts = mydata$orig_pkts, orig_ip_bytes = mydata$orig_ip_bytes,resp_pkts = mydata$resp_pkts, resp_ip_bytes = mydata$resp_ip_bytes))

mydata.good

I dont want to drop any data that may be important so I’ll also use the protocol, connection state and history features in my analysis.

proto <- as.factor(c(mydata$proto))
proto <- unclass(proto)

conn_state <- as.factor(c(mydata$conn_state))
conn_state <- unclass(conn_state)

history <- as.factor(c(mydata$history))
history <- unclass(history)

mydata.good$proto <- proto
mydata.good$conn_state <- conn_state
mydata.good$history <- history

for(i in 1:ncol(mydata.good)) { mydata.good[,i] <- as.numeric(mydata.good[,i]) }

mydata.good
data_missing <- as.data.frame(cbind(duration = mydata$duration, orig_bytes = mydata$orig_bytes, resp_bytes = mydata$resp_bytes))

data_missing

The below code is Alex’s method for 10-fold CV. Since we randomly sampled the intial data set, taking the top 90% of the data frame we now have is still taking a random subset so randomising the data pulled for the training/testing data set wont change the affects. Doing this like this makes the latter mean imputation much simpler.

#   ## We'll do 10-fold CV and then apply DBSCAN, training on 90%
# dg <- mydata.good
# ran <- sample(1:nrow(dg), 0.9 * nrow(dg))
# nor <-function(x) { (x -min(x))/(max(x)-min(x))   }
# dg_norm <- as.data.frame(lapply(dg, nor))
#   # head(dg_norm)
# 
# dg_train <- dg_norm[ran,]     ## extract training set
# dg_test <- dg_norm[-ran,]     ## extract testing set
# dg_target_cat <- dg[ran, ncol(dg)]
# dg_test_cat <- dg[-ran, ncol(dg)]
dg_train <- mydata.good[1:round(0.9*nrow(mydata.good)), ]
dg_test <- mydata.good[tail(1:nrow(mydata.good), 0.1*nrow(mydata.good)), ]

dg_train_missing <- data_missing[1:round(0.9*nrow(data_missing)), ]
dg_test_missing<- data_missing[tail(1:nrow(data_missing), 0.1*nrow(data_missing)), ]

nor <-function(x){ (x -min(x))/(max(x)-min(x))   }
dg_train <- as.data.frame(lapply(dg_train, nor))
dg_test <- as.data.frame(lapply(dg_test, nor))

SVD

Now we can look at running DBSCAN on our data. We first need to perform PCA to figure out how many principle components to use in DBSCAN.

dg_train.svd <- svd(dg_train)
plot(dg_train.svd$d,xlab="Eigenvalue index",ylab="Eigenvalue",log="y")

plot(dg_train.svd$d,xlab="Eigenvalue index",ylab="Eigenvalue")

Plotting with the different axis gives a striking difference. I’ll follow the similar path of using the log axis and thus using 5 principal components since this is where the elbow occurs.

npcs = 5

We now plot the PCA to visualise the clusters formed here. We’re not plotting according to any categorical data i.e. normal vs non-normal so we may not get that much information from this.

i=1;j=2
plot(dg_train.svd$u[,i],
     dg_train.svd$u[,j],type="p",
     col="#33333311",pch=16,cex=1)

As a reflection, all the code in this document was initially run on the same data but with the miss.me column from Alex’s code above which creates a drastic difference in the output of svd. It results in us needing an extra principle component and removes the parallelograms from the plot above - therefore I would assume that ‘missingness’ has a result on clusters and is therefore dependent on which cluster a data point is placed into. Since we are trying to impute the missing data I’m going to use complete case analysis and perform clustering without reference to any missingness.

Finding Parameters for DBSCAN

Eps specifies how close the points should be to each other to form a cluster. If the distance is less than eps, they are considered neighbours. We find this number by finding the ‘knee’ in the plot below. I have chosen to use 10 (dim+1) neighbours here.

test=kNNdist(dg_train.svd$u[,1:npcs], k = 10, all=TRUE)
testmin=apply(test,1,min)
plot(sort(testmin[testmin>1e-8]),log="y")
threshholds= c(0.01,0.001,0.0001,0.00001,0.000001)
abline(h=c(0.01,0.001,0.0001,0.00001,0.000001))
abline(h=0.0001, col="red")

So we choose h=0.0001 as our limit since this allows us to capture most of the information here. We also need to define our minimum number of points to form a cluster. The recommendation is to use minPts = 2*dim for large data sets to ensure we find significant clusters but we’ll look at a range to see what outputs we could get. As a reference, Alex is using 15 clusters so we’ll aim to reduce our data set down to that many but this is dependent on how that clustering looks and performs for mean imputation.

DBSCAN

Now we finally perform DBSCAN.

minPts = c(20, 25, 30, 35, 40, 45, 50, 75, 100, 125, 150, 175, 200, 225, 250, 300, 400)
clustercounts = c()

for(val in minPts) {
  dbscanres = dbscan(dg_train.svd$u[,1:npcs],eps = 0.0001,minPts = val)
  clustercounts[val] <- (length(unique(dbscanres$cluster)))
}
clustercounts
  [1]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA 180  NA
 [22]  NA  NA  NA  72  NA  NA  NA  NA  75  NA  NA  NA  NA  93  NA  NA  NA  NA 110  NA  NA
 [43]  NA  NA 100  NA  NA  NA  NA  99  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
 [64]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  72  NA  NA  NA  NA  NA  NA  NA  NA  NA
 [85]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  47  NA  NA  NA  NA  NA
[106]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  38  NA
[127]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
[148]  NA  NA  39  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
[169]  NA  NA  NA  NA  NA  NA  32  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
[190]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  17  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
[211]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  15  NA  NA  NA  NA  NA  NA
[232]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  20  NA  NA
[253]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
[274]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
[295]  NA  NA  NA  NA  NA  24  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
[316]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
[337]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
[358]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
[379]  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
[400]  13

The amount of clusters we obtain stabilizes somewhere around 200 min points since we get inflections around this point. We’ll visualise them all to see what they look like and give a comparison. To create similarity between this and Alex’s clustering I may use 200 min Points but we’ll reflect on this after the visualisations.

dbscan400 = dbscan(dg_train.svd$u[,1:npcs],eps=0.0001, minPts = 400)
dbscan200 = dbscan(dg_train.svd$u[,1:npcs],eps = 0.0001,minPts = 200)
dbscan175 = dbscan(dg_train.svd$u[,1:npcs],eps=0.0001,minPts = 175)
dbscan50 = dbscan(dg_train.svd$u[,1:npcs],eps=0.0001,minPts = 50)
dbscan30 = dbscan(dg_train.svd$u[,1:npcs],eps=0.0001, minPts = 30)
# trying to calculate the silhouette score of this clustering to see if its valid or not - currently reports Error: Vector memory exhausted (limit reached?) - I've tried looking into work arounds but cant get anything working so I'll leave this for now.
#ss <- silhouette(dbscan200$cluster, dist(dg_train.svd$u))

Plotting resulting clusters

png(file = "DBSCAN400 plots.png")
op<- par(mfrow=c(2,5))
for (k in 1:4){
    a = seq(k+1,5)
    for (l in a){
        if(k==l){next}
        plot(dg_train.svd$u[,k],
            dg_train.svd$u[,l],xlab="",
            ylab="",
            col=c("#66666666",rainbow(41))[dbscan400$cluster+1],pch=19,cex=0.5)
    }
}
par(op)
dev.off()
null device 
          1 
png(file = "DBSCAN200 plots.png")
op<- par(mfrow=c(2,5))
for (k in 1:4){
    a = seq(k+1,5)
    for (l in a){
        if(k==l){next}
        plot(dg_train.svd$u[,k],
            dg_train.svd$u[,l],xlab="",
            ylab="",
            col=c("#66666666",rainbow(41))[dbscan200$cluster+1],pch=19,cex=0.5)
    }
}
par(op)
dev.off()
null device 
          1 
png(file = "DBSCAN175 plots.png")
op<- par(mfrow=c(2,5))
for (k in 1:4){
    a = seq(k+1,5)
    for (l in a){
        if(k==l){next}
        plot(dg_train.svd$u[,k],
            dg_train.svd$u[,l],xlab="",
            ylab="",
            col=c("#66666666",rainbow(41))[dbscan175$cluster+1],pch=19,cex=0.5)
    }
}
par(op)
dev.off()
null device 
          1 
png(file = "DBSCAN50 plots.png")
op<- par(mfrow=c(2,5))
for (k in 1:4){
    a = seq(k+1,5)
    for (l in a){
        if(k==l){next}
        plot(dg_train.svd$u[,k],
            dg_train.svd$u[,l],xlab="",
            ylab="",
            col=c("#66666666",rainbow(41))[dbscan50$cluster+1],pch=19,cex=0.5)
    }
}
par(op)
dev.off()
null device 
          1 
png(file = "DBSCAN30 plots.png")
op<- par(mfrow=c(2,5))
for (k in 1:4){
    a = seq(k+1,5)
    for (l in a){
        if(k==l){next}
        plot(dg_train.svd$u[,k],
            dg_train.svd$u[,l],xlab="",
            ylab="",
            col=c("#66666666",rainbow(41))[dbscan30$cluster+1],pch=19,cex=0.5)
    }
}
par(op)
dev.off()
null device 
          1 

Lets compare the first plot for each of the four clustering’s we perfomed.

plot(dg_train.svd$u[,1],
            dg_train.svd$u[,2],xlab="",
            ylab="", main="minPts = 30, Clusters = 69",
            col=c("#66666666",rainbow(41))[dbscan30$cluster+1],pch=19,cex=0.5)

plot(dg_train.svd$u[,1],
            dg_train.svd$u[,2],xlab="",
            ylab="", main="minPts = 50, Clusters = 95",
            col=c("#66666666",rainbow(41))[dbscan50$cluster+1],pch=19,cex=0.5)

plot(dg_train.svd$u[,1],
            dg_train.svd$u[,2],xlab="",
            ylab="", main="minPts = 175, Clusters = 32",
            col=c("#66666666",rainbow(41))[dbscan175$cluster+1],pch=19,cex=0.5)

plot(dg_train.svd$u[,1],
            dg_train.svd$u[,2],xlab="",
            ylab="", main="minPts = 200, Clusters = 17",
            col=c("#66666666",rainbow(41))[dbscan200$cluster+1],pch=19,cex=0.5)

plot(dg_train.svd$u[,1],
            dg_train.svd$u[,2],xlab="",
            ylab="", main="minPts = 400, Clusters = 13",
            col=c("#66666666",rainbow(41))[dbscan400$cluster+1],pch=19,cex=0.5)

Thus when clustering using larger minPts, we appear to cluster the majority of points into cluster 0 i.e the grey block in the figures. We get a merging of clusters between 30 min points and 200 min points. When performing mean imputation, we can thus either work with a large amount of clusters i.e. when the minPts is small ~30 or fewer clusters but have the majority of points in a single cluster i.e. when the minPts is large ~175.

Imputation

We’ll use the clustering with 200 min points. This allows us to keep close to the way that Alex has done it with 15 clusters and ensures that we’re likely enough to have data in each cluster to allow us to impute missingness.

dbscan200
DBSCAN clustering for 204249 objects.
Parameters: eps = 1e-04, minPts = 200
The clustering contains 16 cluster(s) and 123313 noise points.

     0      1      2      3      4      5      6      7      8      9     10     11 
123313  48603  12257   6099   2422    858   6505    824   1582    309    211    310 
    12     13     14     15     16 
   200    254    157    145    200 

Available fields: cluster, eps, minPts
dg_train.clustered <- data.frame(dg_train)

dg_train.clustered$cluster <- dbscan200$cluster

dg_train.clustered
dg_train_missing.clustered <- data.frame(dg_train_missing)

dg_train_missing.clustered$cluster <- dbscan200$cluster

dg_train_missing.clustered

We need to check to see if we can perform imputation. If all the values in a cluster have n/a then we wont be able to perform the imputation and therefore may need to consider changing the clustering.

for(i in 0:16){
  a <- dg_train_missing.clustered[dg_train_missing.clustered$cluster == i,]

  b <- colSums(is.na(a))/nrow(a)
  
  if(b["duration"] == 1){
    print(paste0("Cluster ", i, " has no non na value(s)"))
  }
}
[1] "Cluster 4 has no non na value(s)"

We see here that all but 1 cluster has values that allow us to impute. Cluster 4 has all n/a values and thus we cant use mean imputation to figure out what these values should be. We’ll consider other ways of imputing solely for this cluster after we’ve imputed for the other clusters. Note that none of the other tested clusters result in better options. All the other clustering’s result in more clusters with no values e.g. dbscan400 has 2 clusters with full missingness and dbscan30 has 22 clusters with full missingness.

for(i in 0:16){
  assign(paste0("cluster",i), dg_train_missing.clustered[dg_train_missing.clustered$cluster == i,])
}
clusters <- c(cluster0,cluster1,cluster2,cluster3,cluster4,cluster5,cluster6,cluster7,cluster8,cluster9,cluster10,cluster11,cluster12,cluster13,cluster14,cluster15,cluster16)

We’ll plot the first cluster in a box plot to visualise outliers and also as a comparison for later.

meltData <- melt(cluster0)
Using  as id variables
p <- ggplot(meltData, aes(factor(variable), value)) 
ggtitle(c("Cluster: 0"))
$title
[1] "Cluster: 0"

attr(,"class")
[1] "labels"
p + geom_boxplot() + facet_wrap(~variable, scale="free")


dmeans = c()
obmeans = c()
rbmeans = c()

for(i in 1:17){
  a <- as.data.frame(c(clusters[4*i-3],clusters[4*i-2],clusters[4*i-1],clusters[4*i]))
  m <- colMeans(a,na.rm = TRUE)
  print(paste0("Currently working on cluster ",i-1, "."))
  
  dmeans = c(dmeans,m[1])
  obmeans = c(obmeans,m[2])
  rbmeans = c(rbmeans,m[3])
  
  for(k in (1:3)){
    for(j in (1:nrow(a))){
      if(is.na(a[j,k])){
        a[j,k] = m[k]
        
      }
    }
  }
   assign(paste0("cluster",i-1),a)
}
[1] "Currently working on cluster 0."
[1] "Currently working on cluster 1."
[1] "Currently working on cluster 2."
[1] "Currently working on cluster 3."
[1] "Currently working on cluster 4."
[1] "Currently working on cluster 5."
[1] "Currently working on cluster 6."
[1] "Currently working on cluster 7."
[1] "Currently working on cluster 8."
[1] "Currently working on cluster 9."
[1] "Currently working on cluster 10."
[1] "Currently working on cluster 11."
[1] "Currently working on cluster 12."
[1] "Currently working on cluster 13."
[1] "Currently working on cluster 14."
[1] "Currently working on cluster 15."
[1] "Currently working on cluster 16."

We’ll finally get the table of means that we wanted. This gives us the mean of each missing column and the cluster they’re from.

means <- data.frame("cluster" = seq(0,16), "duration means"= dmeans, "origin_bytes means" = obmeans, "resp_bytes means " = rbmeans)
pdf("means.pdf", height=11, width=10)
grid.table(means)
dev.off()
null device 
          1 
means

Finally, we’ll test to see how this imputation has worked. We’ll look at the error i.e. the difference between the means produced from the clustered training data and the training data we’ll cluster now. We’ll use the same parameters as defined above to maintain consistency - if we were to check these parameters, we should see similar ones since they are both random samples of the data.

dg_test.svd <- svd(dg_test)
i=1;j=2
plot(dg_test.svd$u[,i],
     dg_test.svd$u[,j],type="p",
     col="#33333311",pch=16,cex=1)

dbscan200Test = dbscan(dg_test.svd$u[,1:npcs],eps = 0.0001,minPts = 200)
dbscan200Test
DBSCAN clustering for 22695 objects.
Parameters: eps = 1e-04, minPts = 200
The clustering contains 0 cluster(s) and 22695 noise points.

    0 
22695 

Available fields: cluster, eps, minPts

So what we find is that the clustering for the test split puts all 20000 data points into the first cluster. We’ll have a look at what result this gives but this ultimately looks like it wont result in any fruitful comparison to see how well DBSCAN performed.

dg_test.clustered <- data.frame(dg_test)

dg_test.clustered$cluster <- dbscan200Test$cluster
dg_test_missing.clustered <- data.frame(dg_test_missing)

dg_test_missing.clustered$cluster <- dbscan200Test$cluster

dg_test_missing.clustered
cluster0testmeans <- as.data.frame(colMeans(dg_test_missing.clustered, na.rm = TRUE))

cluster0testmeans
NA
cluster0trainmeans <- means[1,]
diffmeans = c()

for(i in 2:4){
  trainm <- cluster0trainmeans[i]
  testm <- cluster0testmeans[i-1,]
  diff <- 1 - (testm/trainm)
  diffmeans <- c(diffmeans, diff)
}
as.data.frame(diffmeans)

Thus we have a very large difference in the means of our training data and the means of test data and thus we may assume that DBSCAN in this case doesn’t perform very well.

write.txt(means,"mattmeans.csv")
Error in write.txt(means, "mattmeans.csv") : 
  could not find function "write.txt"

The final step is to impute the data into the original data frame. The below function is VERY slow (I’m not sure how to optimise it) but it does get the correct output.

dg_train$duration <- dg_train_missing.clustered$duration
dg_train$orig_bytes <- dg_train_missing.clustered$orig_bytes
dg_train$resp_bytes <- dg_train_missing.clustered$resp_bytes
dg_train$duration <- dg_train_missing.clustered$duration
dg_train$orig_bytes <- dg_train_missing.clustered$orig_bytes
dg_train$resp_bytes <- dg_train_missing.clustered$resp_bytes
dg_train

For reference, the header of Alex’s data (which matches the data above) is given below.

im <- load.image("AlexMeans.png")
plot(im)

Finally, we’re going to visualise the clustering using t-SNE projection. The main aim of the project was to perform imputation but being able to actually visualise the clusters is important too. The plots above help us understand the data but are hard to infer anything from. We’ll visualise the DBSCAN200 data below.

rtsne_out <- Rtsne(as.matrix(dg_train.clustered), pca = FALSE, verbose = TRUE, check_duplicates = FALSE)
plot(rtsne_out$Y, asp = 1, pch = 20, 
     cex = 0.1, cex.axis = 1.25, cex.lab = 1.25, cex.main = 1.5, 
     xlab = "t-SNE dimension 1", ylab = "t-SNE dimension 2", 
     main = "2D t-SNE projection",col=c("#66666666",rainbow(41))[dbscan200$cluster+1])

We’ll also look at a plot using umap.

plot(data.umap, asp = 1, pch = 20, 
     cex = 0.2, cex.axis = 1.25, cex.lab = 1.25, cex.main = 1.5, 
     main = "2D umap projection",col=c("#66666666",rainbow(41))[dbscan200$cluster+1])

plot(data.umap, asp = 1, pch = 20, 
     cex = 0.2, cex.axis = 1.25, cex.lab = 1.25, cex.main = 1.5, 
     main = "2D umap projection",col=c("#66666666",rainbow(41))[dbscan200$cluster+1])

The difference is startling. Whereas the tsne plot looks fairly jumbled with clusters, with no clusters actually seeming to appear and more scattering within it, the umap plot has very discrete clusters and gives a much better visualisation. We get some scattering between clusters with grey/red points occasionally showing up where we don’t necessarily expect them but overall the clusters look very independent. With this in mind, I would presume that the clustering with a minimum points of 200 does produce valid clusters and is a good way to perform imputation based on clusters, despite some of the earlier issues that may still be valid. Additionally, the umap projection is incredibly fast compared to the tsne projection and therefore is computationally more useful.

To finish, we’ll look at Alex’s clustering compared to ours.

im <- load.image("Bus_Seat.png")
plot(im)

Alex’s clustering doesn’t have the large majority cluster that ours produces and may be the reason that his results are more accurate. The initialisation of random means and therefore random clusters may allow the k-means algorithm to, in this case, work better than the DBSCAN algorithm. It is also worthwhile noting that Alex doen’t get a cluster full of NA’s as we do.

References:

  1. Data from SecRepo

  2. Converting categorical variables

  3. Adding columns to data frames

  4. Finding Unique Values

  5. DBSCAN on flowers

  6. Saving Plots (credit must also be given to Alex for helping me out a huge amount here)

  7. DBSCAN Parameter Estimation

  8. Finding the knee in kNNDist

  9. Silhouette Score introduction

  10. Error with silhouette score

  11. Silhouette Function

  12. Assign function for creating multiple data frames at once

  13. Exporting a data frame as a pdf

  14. Plotting multiple box plots using ggpplot

  15. Using the uwot package

LS0tCnRpdGxlOiAiQXNzZXNzbWVudCAyIC0gTWF0dCIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICBodG1sX25vdGVib29rOiBkZWZhdWx0Ci0tLQojIE92ZXJ2aWV3CkluIHRoaXMgYXNzZXNzbWVudCB3ZSBhaW0gdG8gdXNlIHRoZSBNQUNDREMgY29ubiBkYXRhIHRvIHBlcmZvcm0gZGF0YSBhbmFseXNpcyBhbmQgbW9kZWxsaW5nLgpGaXJzdCB3ZSdsbCBpbXBvcnQgYW55IGxpYnJhcmllcyB3ZSBpbnRlbmQgdG8gdXNlLgoKYGBge3J9CiNpbnN0YWxsLnBhY2thZ2VzKCJkYnNjYW4iKQojaW5zdGFsbC5wYWNrYWdlcygiY2x1c3RlciIpCiNpbnN0YWxsLnBhY2thZ2VzKCJyZXNoYXBlIikKI2luc3RhbGwucGFja2FnZXMoImdncGxvdDIiKXMKI2luc3RhbGwucGFja2FnZXMoImdyaWRFeHRyYSIpCiNpbnN0YWxsLnBhY2thZ2VzKCJNYXRyaXgiKQojaW5zdGFsbC5wYWNrYWdlcygiaXJsYmEiKQojaW5zdGFsbC5wYWNrYWdlcygiUnRzbmUiKQojaW5zdGFsbC5wYWNrYWdlcygidW1hcCIpCiNpbnN0YWxsLnBhY2thZ2VzKCJ1d290IikKI2luc3RhbGwucGFja2FnZXMoImltYWdlciIpCmBgYAoKYGBge3J9CmxpYnJhcnkoZGJzY2FuKQpsaWJyYXJ5KGNsdXN0ZXIpCmxpYnJhcnkocmVzaGFwZSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGdyaWRFeHRyYSkKbGlicmFyeShNYXRyaXgpCmxpYnJhcnkoaXJsYmEpCmxpYnJhcnkoUnRzbmUpCmxpYnJhcnkodW1hcCkKbGlicmFyeSh1d290KQpsaWJyYXJ5KGltYWdlcikKYGBgCgpXZSBmaXJzdCBtdXN0IGltcG9ydCB0aGUgZGF0YS4KCmBgYHtyfQpteWRhdGEgPC0gcmVhZC5jc3YoIk1BQy5jc3YiKQpteWRhdGEgPC0gZGF0YS5mcmFtZShteWRhdGEpCmBgYAoKYGBge3J9Cm15ZGF0YQpgYGAKV2UgZmlyc3Qgd2FudCB0byBsb29rIGZvciBtaXNzaW5nIGRhdGEuIFNlcnZpY2UsIGR1cmF0aW9uLCBvcmlnX2J5dGVzLCByZXNwX2J5dGVzIGFuZCBsb2NhbF9vcmlnIGFsbCBzZWVtIHRvIGhhdmUgbWlzc2luZyBkYXRhIGluIHRoZW0gc28gd2Ugd2lsbCBzZWUgd2hhdCBwZXJjZW50YWdlLgoKYGBge3J9Cm10YWIwPWRhdGEuZnJhbWUoCiAgICBtaXNzaW5nZHVyYXRpb249aXMubmEobXlkYXRhWywiZHVyYXRpb24iXSksCiAgICBwcm90bz1teWRhdGFbLCJwcm90byJdKQptdGFiMD10YWJsZShtdGFiMCkKKGFwcGx5KG10YWIwLDIsZnVuY3Rpb24oeCl4L3N1bSh4KSkpCgptdGFiMT1kYXRhLmZyYW1lKAogICAgbWlzc2luZ19vcmlnX2J5dGVzPWlzLm5hKG15ZGF0YVssIm9yaWdfYnl0ZXMiXSksCiAgICBwcm90bz1teWRhdGFbLCJwcm90byJdKQptdGFiMT10YWJsZShtdGFiMSkKKGFwcGx5KG10YWIxLDIsZnVuY3Rpb24oeCl4L3N1bSh4KSkpCgptdGFiMj1kYXRhLmZyYW1lKAogICAgbWlzc2luZ19yZXNwX2J5dGVzPWlzLm5hKG15ZGF0YVssInJlc3BfYnl0ZXMiXSksCiAgICBwcm90bz1teWRhdGFbLCJwcm90byJdKQptdGFiMj10YWJsZShtdGFiMikKKGFwcGx5KG10YWIyLDIsZnVuY3Rpb24oeCl4L3N1bSh4KSkpCgptdGFiMz1kYXRhLmZyYW1lKAogICAgbWlzc2luZ19sb2NhbF9vcmlnPWlzLm5hKG15ZGF0YVssImxvY2FsX29yaWciXSksCiAgICBwcm90bz1teWRhdGFbLCJwcm90byJdKQptdGFiMz10YWJsZShtdGFiMykKKGFwcGx5KG10YWIzLDIsZnVuY3Rpb24oeCl4L3N1bSh4KSkpCmBgYApUaHVzIHdlIGFyZSBtaXNzaW5nIHRoZSBsb2NhbF9vcmlnIGZlYXR1cmUgZm9yIGV2ZXJ5IGRhdGEgcG9pbnQgaW4gdGhlIGRhdGEgc2V0LiBXZSBtYXkgdGhlbiBjb25zaWRlciBkcm9wcGluZyB0aGlzIGVudGlyZSBjb2x1bW4gYXMgaXQgc2VydmVzIG5vIHVzZSB0byB1cyBhbmQgd2UgY2Fubm90IGltcHV0ZSB0aGUgZGF0YSB3aXRob3V0IHByaW9yIGtub3dsZWRnZSBvZiB0aGUgZGF0YSBzZXQgYW5kIHdoYXQgaXQgc2hvdWxkIGxvb2sgbGlrZS4gVGhlIGR1cmF0aW9uLCBvcmlnX2J5dGVzIGFuZCByZXNwX2J5dGVzIGFsbCBhcHBlYXIgdG8gYmUgbWlzc2luZyBleGFjdGx5IHRoZSBzYW1lIGRhdGEgLSBvbiBmdXJ0aGVyIGFuYWx5c2lzLCB3ZSBzZWUgdGhhdCB3aGVuZXZlciBvbmUgaXMgbWlzc2luZywgYWxsIHRocmVlIGFyZSBtaXNzaW5nLiAKClNvbWUgaW5pdGlhbCBkYXRhIGNsZWFuc2luZyB3aWxsIGNvbWUgZnJvbSByZW1vdmluZyB0aGUgWCBjb2x1bW4gYW5kIHRoZSB0cyBjb2x1bW4uIFRoZSBYIGNvbHVtbiBpcyBwcm9kdWNlZCBieSB0aGUgc2FtcGxpbmcgYW5kIHNpbmNlIHdlIGhhdmUgYSByYW5kb20gc2FtcGxlIG9mIHRoZSBkYXRhLCB0aGUgdHMgcHJvdmlkZXMgbm8gcmVhbCBpbmZvcm1hdGlvbiBvbiB0aGUgZGF0YS4KCmBgYHtyfQp1bmlxdWVfdWlkIDwtIG15ZGF0YVshZHVwbGljYXRlZChteWRhdGFbLGMoJ3VpZCcpXSksXQp1bmlxdWVfdWlkCmBgYApUaHVzIGFsbCBvdXIgdWlkJ3MgYXJlIHVuaXF1ZSBhbmQgdGhlcmVmb3JlIHdvbnQgcHJvdmlkZSB1cyB3aXRoIGFueSBleHRyYSBpbmZvcm1hdGlvbiBlaXRoZXIgc2luY2UgdGhleSB3aWxsIGJlIHVuY29ycmVsYXRlZCB3aXRoIHRoZSByZXN0IG9mIHRoZSBkYXRhLiBUaGlzIGlzIHRoZSBvbmx5IGNvbHVtbiB3aXRoIHRoaXMgdHJhaXQsIGFuZCBhbGwgb3RoZXIgY29sdW1ucyBoYXZlIHZhbHVlcyB3aGljaCBvY2N1ciBtb3JlIHRoYW4gb25jZSBzbyB3ZSBjYW4gZHJvcCB0aGUgdWlkIGNvbHVtbiB0b28uCgpgYGB7cn0KZHJvcF9jb2x1bW5zIDwtIGMoIlgiLCJ0cyIsImxvY2FsX29yaWciLCJ1aWQiKQpteWRhdGEgPC0gbXlkYXRhWywgIW5hbWVzKG15ZGF0YSkgJWluJSBkcm9wX2NvbHVtbnNdCmBgYAoKYGBge3J9CmhlYWQobXlkYXRhKQpgYGAKClNvIHdlIGhhdmUgcmVtb3ZlZCB0aGUgY29sdW1ucyB0aGF0IGRpZG4ndCBwcm92aWRlIHVzIHdpdGggYW55IGV4dHJhIGluZm9ybWF0aW9uLiBXZSB3aWxsIG5vdyBleHRyYWN0IHRoZSBkYXRhIHdlIHdpbGwgdXNlIGZvciBEQlNDQU4gdG8gY3JlYXRlIGNsdXN0ZXJzLiBUaGUgZm9sbG93aW5nIGNvZGUgaXMgcHVsbGVkIGZyb20gQWxleCdzIHdvcmtib29rIGFuZCBhbGxvd3MgdXMgdG8gcHVsbCBvdXQgNyBvZiB0aGUgZmVhdHVyZXMgdG8gdXNlIGZvciBEQlNDQU4gYW5kIGVuc3VyZXMgYWxsIGVsZW1lbnRzIGFyZSBudW1lcmljLgoKYGBge3J9CiMgbWlzcy5tZSA8LSB2ZWN0b3IobGVuZ3RoID0gbnJvdyhteWRhdGEpKQojIG1pc3MubWUgPC0gcmVwKDAsIHRpbWVzID0gbnJvdyhteWRhdGEpKQojIGZvcihpIGluIDE6bnJvdyhteWRhdGEpKSB7CiMgCWlmKGlzLm5hKG15ZGF0YSRkdXJhdGlvbltpXSkpIHsgbWlzcy5tZVtpXSA8LSAxIH0KIyAJfQojIHN0cihteWRhdGEpCiMgbXlkYXRhLmdvb2QgPC0gYXMuZGF0YS5mcmFtZShjYmluZChpZC5vcmlnX3AgPSBteWRhdGEkaWQub3JpZ19wLCBpZC5yZXNwX3AgPSBteWRhdGEkaWQucmVzcF9wLCAKIyBvcmlnX3BrdHMgPSBteWRhdGEkb3JpZ19wa3RzLCBvcmlnX2lwX2J5dGVzID0gbXlkYXRhJG9yaWdfaXBfYnl0ZXMsIAojIHJlc3BfcGt0cyA9IG15ZGF0YSRyZXNwX3BrdHMsIHJlc3BfaXBfYnl0ZXMgPSBteWRhdGEkcmVzcF9pcF9ieXRlcykpCiMgbXlkYXRhLmdvb2Q8LSBjYmluZChteWRhdGEuZ29vZCwgbWlzcy5tZSkKIyBoZWFkKG15ZGF0YS5nb29kKQojIHN0cihteWRhdGEuZ29vZCkgIyBTaG91bGQgYmUgb25seSBpbnRzIGFuZCBudW1zCiMgCiMgZm9yKGkgaW4gMTpuY29sKG15ZGF0YS5nb29kKSkgeyBteWRhdGEuZ29vZFssaV0gPC0gYXMubnVtZXJpYyhteWRhdGEuZ29vZFssaV0pIH0KIyBzdHIobXlkYXRhLmdvb2QpCQkjIyBBbGwgc2hvdWxkIGJlIG51bXMgbm93CiMgIyBzdW0obXlkYXRhLmdvb2QkbWlzcy5tZSkvbnJvdyhteWRhdGEuZ29vZCkgIyMgODIuNyUgbWlzc2luZwoKYGBgCgpUaGUgZGF0YSBjbGVhbnNpbmcgQWxleCBwZXJmb3JtZWQgd2Fzbid0IHZlcnkgY29uZHVjaXZlIHRvIGFsbG93aW5nIG1lIHRvIGltcHV0ZSBkYXRhIHNvIEkgd2lsbCB1c2UgdGhlIGJhc2lzIG9mIGhpcyBidXQgbWFrZSBzb21lIHNtYWxsIGNoYW5nZXMuCmBgYHtyfQpteWRhdGEuZ29vZCA8LSBhcy5kYXRhLmZyYW1lKGNiaW5kKGlkLm9yaWdfcCA9IG15ZGF0YSRpZC5vcmlnX3AsIGlkLnJlc3BfcCA9IG15ZGF0YSRpZC5yZXNwX3AsIG9yaWdfcGt0cyA9IG15ZGF0YSRvcmlnX3BrdHMsIG9yaWdfaXBfYnl0ZXMgPSBteWRhdGEkb3JpZ19pcF9ieXRlcyxyZXNwX3BrdHMgPSBteWRhdGEkcmVzcF9wa3RzLCByZXNwX2lwX2J5dGVzID0gbXlkYXRhJHJlc3BfaXBfYnl0ZXMpKQoKbXlkYXRhLmdvb2QKYGBgCgpJIGRvbnQgd2FudCB0byBkcm9wIGFueSBkYXRhIHRoYXQgbWF5IGJlIGltcG9ydGFudCBzbyBJJ2xsIGFsc28gdXNlIHRoZSBwcm90b2NvbCwgY29ubmVjdGlvbiBzdGF0ZSBhbmQgaGlzdG9yeSBmZWF0dXJlcyBpbiBteSBhbmFseXNpcy4KCmBgYHtyfQpwcm90byA8LSBhcy5mYWN0b3IoYyhteWRhdGEkcHJvdG8pKQpwcm90byA8LSB1bmNsYXNzKHByb3RvKQoKY29ubl9zdGF0ZSA8LSBhcy5mYWN0b3IoYyhteWRhdGEkY29ubl9zdGF0ZSkpCmNvbm5fc3RhdGUgPC0gdW5jbGFzcyhjb25uX3N0YXRlKQoKaGlzdG9yeSA8LSBhcy5mYWN0b3IoYyhteWRhdGEkaGlzdG9yeSkpCmhpc3RvcnkgPC0gdW5jbGFzcyhoaXN0b3J5KQoKbXlkYXRhLmdvb2QkcHJvdG8gPC0gcHJvdG8KbXlkYXRhLmdvb2QkY29ubl9zdGF0ZSA8LSBjb25uX3N0YXRlCm15ZGF0YS5nb29kJGhpc3RvcnkgPC0gaGlzdG9yeQoKZm9yKGkgaW4gMTpuY29sKG15ZGF0YS5nb29kKSkgeyBteWRhdGEuZ29vZFssaV0gPC0gYXMubnVtZXJpYyhteWRhdGEuZ29vZFssaV0pIH0KCm15ZGF0YS5nb29kCmBgYAoKYGBge3J9CmRhdGFfbWlzc2luZyA8LSBhcy5kYXRhLmZyYW1lKGNiaW5kKGR1cmF0aW9uID0gbXlkYXRhJGR1cmF0aW9uLCBvcmlnX2J5dGVzID0gbXlkYXRhJG9yaWdfYnl0ZXMsIHJlc3BfYnl0ZXMgPSBteWRhdGEkcmVzcF9ieXRlcykpCgpkYXRhX21pc3NpbmcKYGBgClRoZSBiZWxvdyBjb2RlIGlzIEFsZXgncyBtZXRob2QgZm9yIDEwLWZvbGQgQ1YuIFNpbmNlIHdlIHJhbmRvbWx5IHNhbXBsZWQgdGhlIGludGlhbCBkYXRhIHNldCwgdGFraW5nIHRoZSB0b3AgOTAlIG9mIHRoZSBkYXRhIGZyYW1lIHdlIG5vdyBoYXZlIGlzIHN0aWxsIHRha2luZyBhIHJhbmRvbSBzdWJzZXQgc28gcmFuZG9taXNpbmcgdGhlIGRhdGEgcHVsbGVkIGZvciB0aGUgdHJhaW5pbmcvdGVzdGluZyBkYXRhIHNldCB3b250IGNoYW5nZSB0aGUgYWZmZWN0cy4gRG9pbmcgdGhpcyBsaWtlIHRoaXMgbWFrZXMgdGhlIGxhdHRlciBtZWFuIGltcHV0YXRpb24gbXVjaCBzaW1wbGVyLgoKYGBge3J9CiMgCSMjIFdlJ2xsIGRvIDEwLWZvbGQgQ1YgYW5kIHRoZW4gYXBwbHkgREJTQ0FOLCB0cmFpbmluZyBvbiA5MCUKIyBkZyA8LSBteWRhdGEuZ29vZAojIHJhbiA8LSBzYW1wbGUoMTpucm93KGRnKSwgMC45ICogbnJvdyhkZykpCiMgbm9yIDwtZnVuY3Rpb24oeCkgeyAoeCAtbWluKHgpKS8obWF4KHgpLW1pbih4KSkgICB9CiMgZGdfbm9ybSA8LSBhcy5kYXRhLmZyYW1lKGxhcHBseShkZywgbm9yKSkKIyAJIyBoZWFkKGRnX25vcm0pCiMgCiMgZGdfdHJhaW4gPC0gZGdfbm9ybVtyYW4sXSAJIyMgZXh0cmFjdCB0cmFpbmluZyBzZXQKIyBkZ190ZXN0IDwtIGRnX25vcm1bLXJhbixdICAgCSMjIGV4dHJhY3QgdGVzdGluZyBzZXQKIyBkZ190YXJnZXRfY2F0IDwtIGRnW3JhbiwgbmNvbChkZyldCiMgZGdfdGVzdF9jYXQgPC0gZGdbLXJhbiwgbmNvbChkZyldCmBgYAoKYGBge3J9CmRnX3RyYWluIDwtIG15ZGF0YS5nb29kWzE6cm91bmQoMC45Km5yb3cobXlkYXRhLmdvb2QpKSwgXQpkZ190ZXN0IDwtIG15ZGF0YS5nb29kW3RhaWwoMTpucm93KG15ZGF0YS5nb29kKSwgMC4xKm5yb3cobXlkYXRhLmdvb2QpKSwgXQoKZGdfdHJhaW5fbWlzc2luZyA8LSBkYXRhX21pc3NpbmdbMTpyb3VuZCgwLjkqbnJvdyhkYXRhX21pc3NpbmcpKSwgXQpkZ190ZXN0X21pc3Npbmc8LSBkYXRhX21pc3NpbmdbdGFpbCgxOm5yb3coZGF0YV9taXNzaW5nKSwgMC4xKm5yb3coZGF0YV9taXNzaW5nKSksIF0KCm5vciA8LWZ1bmN0aW9uKHgpeyAoeCAtbWluKHgpKS8obWF4KHgpLW1pbih4KSkgICB9CmRnX3RyYWluIDwtIGFzLmRhdGEuZnJhbWUobGFwcGx5KGRnX3RyYWluLCBub3IpKQpkZ190ZXN0IDwtIGFzLmRhdGEuZnJhbWUobGFwcGx5KGRnX3Rlc3QsIG5vcikpCmBgYAoKIyMgU1ZECgpOb3cgd2UgY2FuIGxvb2sgYXQgcnVubmluZyBEQlNDQU4gb24gb3VyIGRhdGEuIFdlIGZpcnN0IG5lZWQgdG8gcGVyZm9ybSBQQ0EgdG8gZmlndXJlIG91dCBob3cgbWFueSBwcmluY2lwbGUgY29tcG9uZW50cyB0byB1c2UgaW4gREJTQ0FOLgoKYGBge3J9CmRnX3RyYWluLnN2ZCA8LSBzdmQoZGdfdHJhaW4pCmBgYAoKYGBge3J9CnBsb3QoZGdfdHJhaW4uc3ZkJGQseGxhYj0iRWlnZW52YWx1ZSBpbmRleCIseWxhYj0iRWlnZW52YWx1ZSIsbG9nPSJ5IikKcGxvdChkZ190cmFpbi5zdmQkZCx4bGFiPSJFaWdlbnZhbHVlIGluZGV4Iix5bGFiPSJFaWdlbnZhbHVlIikKYGBgCgpQbG90dGluZyB3aXRoIHRoZSBkaWZmZXJlbnQgYXhpcyBnaXZlcyBhIHN0cmlraW5nIGRpZmZlcmVuY2UuIEknbGwgZm9sbG93IHRoZSBzaW1pbGFyIHBhdGggb2YgdXNpbmcgdGhlIGxvZyBheGlzIGFuZCB0aHVzIHVzaW5nIDUgcHJpbmNpcGFsIGNvbXBvbmVudHMgc2luY2UgdGhpcyBpcyB3aGVyZSB0aGUgZWxib3cgb2NjdXJzLgoKYGBge3J9Cm5wY3MgPSA1CmBgYAoKV2Ugbm93IHBsb3QgdGhlIFBDQSB0byB2aXN1YWxpc2UgdGhlIGNsdXN0ZXJzIGZvcm1lZCBoZXJlLiBXZSdyZSBub3QgcGxvdHRpbmcgYWNjb3JkaW5nIHRvIGFueSBjYXRlZ29yaWNhbCBkYXRhIGkuZS4gbm9ybWFsIHZzIG5vbi1ub3JtYWwgc28gd2UgbWF5IG5vdCBnZXQgdGhhdCBtdWNoIGluZm9ybWF0aW9uIGZyb20gdGhpcy4KCmBgYHtyfQppPTE7aj0yCnBsb3QoZGdfdHJhaW4uc3ZkJHVbLGldLAogICAgIGRnX3RyYWluLnN2ZCR1WyxqXSx0eXBlPSJwIiwKICAgICBjb2w9IiMzMzMzMzMxMSIscGNoPTE2LGNleD0xKQpgYGAKCkFzIGEgcmVmbGVjdGlvbiwgYWxsIHRoZSBjb2RlIGluIHRoaXMgZG9jdW1lbnQgd2FzIGluaXRpYWxseSBydW4gb24gdGhlIHNhbWUgZGF0YSBidXQgd2l0aCB0aGUgbWlzcy5tZSBjb2x1bW4gZnJvbSBBbGV4J3MgY29kZSBhYm92ZSB3aGljaCBjcmVhdGVzIGEgZHJhc3RpYyBkaWZmZXJlbmNlIGluIHRoZSBvdXRwdXQgb2Ygc3ZkLiBJdCByZXN1bHRzIGluIHVzIG5lZWRpbmcgYW4gZXh0cmEgcHJpbmNpcGxlIGNvbXBvbmVudCBhbmQgcmVtb3ZlcyB0aGUgcGFyYWxsZWxvZ3JhbXMgZnJvbSB0aGUgcGxvdCBhYm92ZSAtIHRoZXJlZm9yZSBJIHdvdWxkIGFzc3VtZSB0aGF0ICdtaXNzaW5nbmVzcycgaGFzIGEgcmVzdWx0IG9uIGNsdXN0ZXJzIGFuZCBpcyB0aGVyZWZvcmUgZGVwZW5kZW50IG9uIHdoaWNoIGNsdXN0ZXIgYSBkYXRhIHBvaW50IGlzIHBsYWNlZCBpbnRvLiBTaW5jZSB3ZSBhcmUgdHJ5aW5nIHRvIGltcHV0ZSB0aGUgbWlzc2luZyBkYXRhIEknbSBnb2luZyB0byB1c2UgY29tcGxldGUgY2FzZSBhbmFseXNpcyBhbmQgcGVyZm9ybSBjbHVzdGVyaW5nIHdpdGhvdXQgcmVmZXJlbmNlIHRvIGFueSBtaXNzaW5nbmVzcy4KCiMjIEZpbmRpbmcgUGFyYW1ldGVycyBmb3IgREJTQ0FOCgpFcHMgc3BlY2lmaWVzIGhvdyBjbG9zZSB0aGUgcG9pbnRzIHNob3VsZCBiZSB0byBlYWNoIG90aGVyIHRvIGZvcm0gYSBjbHVzdGVyLiBJZiB0aGUgZGlzdGFuY2UgaXMgbGVzcyB0aGFuIGVwcywgdGhleSBhcmUgY29uc2lkZXJlZCBuZWlnaGJvdXJzLiBXZSBmaW5kIHRoaXMgbnVtYmVyIGJ5IGZpbmRpbmcgdGhlICdrbmVlJyBpbiB0aGUgcGxvdCBiZWxvdy4gSSBoYXZlIGNob3NlbiB0byB1c2UgMTAgKGRpbSsxKSBuZWlnaGJvdXJzIGhlcmUuCgpgYGB7cn0KdGVzdD1rTk5kaXN0KGRnX3RyYWluLnN2ZCR1WywxOm5wY3NdLCBrID0gMTAsIGFsbD1UUlVFKQp0ZXN0bWluPWFwcGx5KHRlc3QsMSxtaW4pCmBgYAoKYGBge3J9CnBsb3Qoc29ydCh0ZXN0bWluW3Rlc3RtaW4+MWUtOF0pLGxvZz0ieSIpCnRocmVzaGhvbGRzPSBjKDAuMDEsMC4wMDEsMC4wMDAxLDAuMDAwMDEsMC4wMDAwMDEpCmFibGluZShoPWMoMC4wMSwwLjAwMSwwLjAwMDEsMC4wMDAwMSwwLjAwMDAwMSkpCmFibGluZShoPTAuMDAwMSwgY29sPSJyZWQiKQpgYGAKClNvIHdlIGNob29zZSBoPTAuMDAwMSBhcyBvdXIgbGltaXQgc2luY2UgdGhpcyBhbGxvd3MgdXMgdG8gY2FwdHVyZSBtb3N0IG9mIHRoZSBpbmZvcm1hdGlvbiBoZXJlLiBXZSBhbHNvIG5lZWQgdG8gZGVmaW5lIG91ciBtaW5pbXVtIG51bWJlciBvZiBwb2ludHMgdG8gZm9ybSBhIGNsdXN0ZXIuIFRoZSByZWNvbW1lbmRhdGlvbiBpcyB0byB1c2UgbWluUHRzID0gMipkaW0gZm9yIGxhcmdlIGRhdGEgc2V0cyB0byBlbnN1cmUgd2UgZmluZCBzaWduaWZpY2FudCBjbHVzdGVycyBidXQgd2UnbGwgbG9vayBhdCBhIHJhbmdlIHRvIHNlZSB3aGF0IG91dHB1dHMgd2UgY291bGQgZ2V0LiBBcyBhIHJlZmVyZW5jZSwgQWxleCBpcyB1c2luZyAxNSBjbHVzdGVycyBzbyB3ZSdsbCBhaW0gdG8gcmVkdWNlIG91ciBkYXRhIHNldCBkb3duIHRvIHRoYXQgbWFueSBidXQgdGhpcyBpcyBkZXBlbmRlbnQgb24gaG93IHRoYXQgY2x1c3RlcmluZyBsb29rcyBhbmQgcGVyZm9ybXMgZm9yIG1lYW4gaW1wdXRhdGlvbi4KCiMjIERCU0NBTgoKTm93IHdlIGZpbmFsbHkgcGVyZm9ybSBEQlNDQU4uCgpgYGB7cn0KbWluUHRzID0gYygyMCwgMjUsIDMwLCAzNSwgNDAsIDQ1LCA1MCwgNzUsIDEwMCwgMTI1LCAxNTAsIDE3NSwgMjAwLCAyMjUsIDI1MCwgMzAwLCA0MDApCmNsdXN0ZXJjb3VudHMgPSBjKCkKCmZvcih2YWwgaW4gbWluUHRzKSB7CiAgZGJzY2FucmVzID0gZGJzY2FuKGRnX3RyYWluLnN2ZCR1WywxOm5wY3NdLGVwcyA9IDAuMDAwMSxtaW5QdHMgPSB2YWwpCiAgY2x1c3RlcmNvdW50c1t2YWxdIDwtIChsZW5ndGgodW5pcXVlKGRic2NhbnJlcyRjbHVzdGVyKSkpCn0KYGBgCgpgYGB7cn0KY2x1c3RlcmNvdW50cwpgYGAKClRoZSBhbW91bnQgb2YgY2x1c3RlcnMgd2Ugb2J0YWluIHN0YWJpbGl6ZXMgc29tZXdoZXJlIGFyb3VuZCAyMDAgbWluIHBvaW50cyBzaW5jZSB3ZSBnZXQgaW5mbGVjdGlvbnMgYXJvdW5kIHRoaXMgcG9pbnQuIFdlJ2xsIHZpc3VhbGlzZSB0aGVtIGFsbCB0byBzZWUgd2hhdCB0aGV5IGxvb2sgbGlrZSBhbmQgZ2l2ZSBhIGNvbXBhcmlzb24uIFRvIGNyZWF0ZSBzaW1pbGFyaXR5IGJldHdlZW4gdGhpcyBhbmQgQWxleCdzIGNsdXN0ZXJpbmcgSSBtYXkgdXNlIDIwMCBtaW4gUG9pbnRzIGJ1dCB3ZSdsbCByZWZsZWN0IG9uIHRoaXMgYWZ0ZXIgdGhlIHZpc3VhbGlzYXRpb25zLgoKYGBge3J9CmRic2NhbjQwMCA9IGRic2NhbihkZ190cmFpbi5zdmQkdVssMTpucGNzXSxlcHM9MC4wMDAxLCBtaW5QdHMgPSA0MDApCmRic2NhbjIwMCA9IGRic2NhbihkZ190cmFpbi5zdmQkdVssMTpucGNzXSxlcHMgPSAwLjAwMDEsbWluUHRzID0gMjAwKQpkYnNjYW4xNzUgPSBkYnNjYW4oZGdfdHJhaW4uc3ZkJHVbLDE6bnBjc10sZXBzPTAuMDAwMSxtaW5QdHMgPSAxNzUpCmRic2NhbjUwID0gZGJzY2FuKGRnX3RyYWluLnN2ZCR1WywxOm5wY3NdLGVwcz0wLjAwMDEsbWluUHRzID0gNTApCmRic2NhbjMwID0gZGJzY2FuKGRnX3RyYWluLnN2ZCR1WywxOm5wY3NdLGVwcz0wLjAwMDEsIG1pblB0cyA9IDMwKQpgYGAKCmBgYHtyfQojIHRyeWluZyB0byBjYWxjdWxhdGUgdGhlIHNpbGhvdWV0dGUgc2NvcmUgb2YgdGhpcyBjbHVzdGVyaW5nIHRvIHNlZSBpZiBpdHMgdmFsaWQgb3Igbm90IC0gY3VycmVudGx5IHJlcG9ydHMgRXJyb3I6IFZlY3RvciBtZW1vcnkgZXhoYXVzdGVkIChsaW1pdCByZWFjaGVkPykgLSBJJ3ZlIHRyaWVkIGxvb2tpbmcgaW50byB3b3JrIGFyb3VuZHMgYnV0IGNhbnQgZ2V0IGFueXRoaW5nIHdvcmtpbmcgc28gSSdsbCBsZWF2ZSB0aGlzIGZvciBub3cuCiNzcyA8LSBzaWxob3VldHRlKGRic2NhbjIwMCRjbHVzdGVyLCBkaXN0KGRnX3RyYWluLnN2ZCR1KSkKYGBgCgojIyBQbG90dGluZyByZXN1bHRpbmcgY2x1c3RlcnMKCgpgYGB7cn0KcG5nKGZpbGUgPSAiREJTQ0FONDAwIHBsb3RzLnBuZyIpCm9wPC0gcGFyKG1mcm93PWMoMiw1KSkKZm9yIChrIGluIDE6NCl7CiAgICBhID0gc2VxKGsrMSw1KQogICAgZm9yIChsIGluIGEpewogICAgICAgIGlmKGs9PWwpe25leHR9CiAgICAgICAgcGxvdChkZ190cmFpbi5zdmQkdVssa10sCiAgICAgICAgICAgIGRnX3RyYWluLnN2ZCR1WyxsXSx4bGFiPSIiLAogICAgICAgICAgICB5bGFiPSIiLAogICAgICAgICAgICBjb2w9YygiIzY2NjY2NjY2IixyYWluYm93KDQxKSlbZGJzY2FuNDAwJGNsdXN0ZXIrMV0scGNoPTE5LGNleD0wLjUpCiAgICB9Cn0KcGFyKG9wKQpkZXYub2ZmKCkKYGBgCgoKYGBge3J9CnBuZyhmaWxlID0gIkRCU0NBTjIwMCBwbG90cy5wbmciKQpvcDwtIHBhcihtZnJvdz1jKDIsNSkpCmZvciAoayBpbiAxOjQpewogICAgYSA9IHNlcShrKzEsNSkKICAgIGZvciAobCBpbiBhKXsKICAgICAgICBpZihrPT1sKXtuZXh0fQogICAgICAgIHBsb3QoZGdfdHJhaW4uc3ZkJHVbLGtdLAogICAgICAgICAgICBkZ190cmFpbi5zdmQkdVssbF0seGxhYj0iIiwKICAgICAgICAgICAgeWxhYj0iIiwKICAgICAgICAgICAgY29sPWMoIiM2NjY2NjY2NiIscmFpbmJvdyg0MSkpW2Ric2NhbjIwMCRjbHVzdGVyKzFdLHBjaD0xOSxjZXg9MC41KQogICAgfQp9CnBhcihvcCkKZGV2Lm9mZigpCmBgYAoKYGBge3J9CnBuZyhmaWxlID0gIkRCU0NBTjE3NSBwbG90cy5wbmciKQpvcDwtIHBhcihtZnJvdz1jKDIsNSkpCmZvciAoayBpbiAxOjQpewogICAgYSA9IHNlcShrKzEsNSkKICAgIGZvciAobCBpbiBhKXsKICAgICAgICBpZihrPT1sKXtuZXh0fQogICAgICAgIHBsb3QoZGdfdHJhaW4uc3ZkJHVbLGtdLAogICAgICAgICAgICBkZ190cmFpbi5zdmQkdVssbF0seGxhYj0iIiwKICAgICAgICAgICAgeWxhYj0iIiwKICAgICAgICAgICAgY29sPWMoIiM2NjY2NjY2NiIscmFpbmJvdyg0MSkpW2Ric2NhbjE3NSRjbHVzdGVyKzFdLHBjaD0xOSxjZXg9MC41KQogICAgfQp9CnBhcihvcCkKZGV2Lm9mZigpCmBgYAoKYGBge3J9CnBuZyhmaWxlID0gIkRCU0NBTjUwIHBsb3RzLnBuZyIpCm9wPC0gcGFyKG1mcm93PWMoMiw1KSkKZm9yIChrIGluIDE6NCl7CiAgICBhID0gc2VxKGsrMSw1KQogICAgZm9yIChsIGluIGEpewogICAgICAgIGlmKGs9PWwpe25leHR9CiAgICAgICAgcGxvdChkZ190cmFpbi5zdmQkdVssa10sCiAgICAgICAgICAgIGRnX3RyYWluLnN2ZCR1WyxsXSx4bGFiPSIiLAogICAgICAgICAgICB5bGFiPSIiLAogICAgICAgICAgICBjb2w9YygiIzY2NjY2NjY2IixyYWluYm93KDQxKSlbZGJzY2FuNTAkY2x1c3RlcisxXSxwY2g9MTksY2V4PTAuNSkKICAgIH0KfQpwYXIob3ApCmRldi5vZmYoKQpgYGAKCmBgYHtyfQpwbmcoZmlsZSA9ICJEQlNDQU4zMCBwbG90cy5wbmciKQpvcDwtIHBhcihtZnJvdz1jKDIsNSkpCmZvciAoayBpbiAxOjQpewogICAgYSA9IHNlcShrKzEsNSkKICAgIGZvciAobCBpbiBhKXsKICAgICAgICBpZihrPT1sKXtuZXh0fQogICAgICAgIHBsb3QoZGdfdHJhaW4uc3ZkJHVbLGtdLAogICAgICAgICAgICBkZ190cmFpbi5zdmQkdVssbF0seGxhYj0iIiwKICAgICAgICAgICAgeWxhYj0iIiwKICAgICAgICAgICAgY29sPWMoIiM2NjY2NjY2NiIscmFpbmJvdyg0MSkpW2Ric2NhbjMwJGNsdXN0ZXIrMV0scGNoPTE5LGNleD0wLjUpCiAgICB9Cn0KcGFyKG9wKQpkZXYub2ZmKCkKYGBgCgoKTGV0cyBjb21wYXJlIHRoZSBmaXJzdCBwbG90IGZvciBlYWNoIG9mIHRoZSBmb3VyIGNsdXN0ZXJpbmcncyB3ZSBwZXJmb21lZC4KCmBgYHtyfQpwbG90KGRnX3RyYWluLnN2ZCR1WywxXSwKICAgICAgICAgICAgZGdfdHJhaW4uc3ZkJHVbLDJdLHhsYWI9IiIsCiAgICAgICAgICAgIHlsYWI9IiIsIG1haW49Im1pblB0cyA9IDMwLCBDbHVzdGVycyA9IDY5IiwKICAgICAgICAgICAgY29sPWMoIiM2NjY2NjY2NiIscmFpbmJvdyg0MSkpW2Ric2NhbjMwJGNsdXN0ZXIrMV0scGNoPTE5LGNleD0wLjUpCnBsb3QoZGdfdHJhaW4uc3ZkJHVbLDFdLAogICAgICAgICAgICBkZ190cmFpbi5zdmQkdVssMl0seGxhYj0iIiwKICAgICAgICAgICAgeWxhYj0iIiwgbWFpbj0ibWluUHRzID0gNTAsIENsdXN0ZXJzID0gOTUiLAogICAgICAgICAgICBjb2w9YygiIzY2NjY2NjY2IixyYWluYm93KDQxKSlbZGJzY2FuNTAkY2x1c3RlcisxXSxwY2g9MTksY2V4PTAuNSkKcGxvdChkZ190cmFpbi5zdmQkdVssMV0sCiAgICAgICAgICAgIGRnX3RyYWluLnN2ZCR1WywyXSx4bGFiPSIiLAogICAgICAgICAgICB5bGFiPSIiLCBtYWluPSJtaW5QdHMgPSAxNzUsIENsdXN0ZXJzID0gMzIiLAogICAgICAgICAgICBjb2w9YygiIzY2NjY2NjY2IixyYWluYm93KDQxKSlbZGJzY2FuMTc1JGNsdXN0ZXIrMV0scGNoPTE5LGNleD0wLjUpCnBsb3QoZGdfdHJhaW4uc3ZkJHVbLDFdLAogICAgICAgICAgICBkZ190cmFpbi5zdmQkdVssMl0seGxhYj0iIiwKICAgICAgICAgICAgeWxhYj0iIiwgbWFpbj0ibWluUHRzID0gMjAwLCBDbHVzdGVycyA9IDE3IiwKICAgICAgICAgICAgY29sPWMoIiM2NjY2NjY2NiIscmFpbmJvdyg0MSkpW2Ric2NhbjIwMCRjbHVzdGVyKzFdLHBjaD0xOSxjZXg9MC41KQpwbG90KGRnX3RyYWluLnN2ZCR1WywxXSwKICAgICAgICAgICAgZGdfdHJhaW4uc3ZkJHVbLDJdLHhsYWI9IiIsCiAgICAgICAgICAgIHlsYWI9IiIsIG1haW49Im1pblB0cyA9IDQwMCwgQ2x1c3RlcnMgPSAxMyIsCiAgICAgICAgICAgIGNvbD1jKCIjNjY2NjY2NjYiLHJhaW5ib3coNDEpKVtkYnNjYW40MDAkY2x1c3RlcisxXSxwY2g9MTksY2V4PTAuNSkKYGBgCgpUaHVzIHdoZW4gY2x1c3RlcmluZyB1c2luZyBsYXJnZXIgbWluUHRzLCB3ZSBhcHBlYXIgdG8gY2x1c3RlciB0aGUgbWFqb3JpdHkgb2YgcG9pbnRzIGludG8gY2x1c3RlciAwIGkuZSB0aGUgZ3JleSBibG9jayBpbiB0aGUgZmlndXJlcy4gV2UgZ2V0IGEgbWVyZ2luZyBvZiBjbHVzdGVycyBiZXR3ZWVuIDMwIG1pbiBwb2ludHMgYW5kIDIwMCBtaW4gcG9pbnRzLiBXaGVuIHBlcmZvcm1pbmcgbWVhbiBpbXB1dGF0aW9uLCB3ZSBjYW4gdGh1cyBlaXRoZXIgd29yayB3aXRoIGEgbGFyZ2UgYW1vdW50IG9mIGNsdXN0ZXJzIGkuZS4gd2hlbiB0aGUgbWluUHRzIGlzIHNtYWxsIH4zMCBvciBmZXdlciBjbHVzdGVycyBidXQgaGF2ZSB0aGUgbWFqb3JpdHkgb2YgcG9pbnRzIGluIGEgc2luZ2xlIGNsdXN0ZXIgaS5lLiB3aGVuIHRoZSBtaW5QdHMgaXMgbGFyZ2UgfjE3NS4KCiMjIEltcHV0YXRpb24KCldlJ2xsIHVzZSB0aGUgY2x1c3RlcmluZyB3aXRoIDIwMCBtaW4gcG9pbnRzLiBUaGlzIGFsbG93cyB1cyB0byBrZWVwIGNsb3NlIHRvIHRoZSB3YXkgdGhhdCBBbGV4IGhhcyBkb25lIGl0IHdpdGggMTUgY2x1c3RlcnMgYW5kIGVuc3VyZXMgdGhhdCB3ZSdyZSBsaWtlbHkgZW5vdWdoIHRvIGhhdmUgZGF0YSBpbiBlYWNoIGNsdXN0ZXIgdG8gYWxsb3cgdXMgdG8gaW1wdXRlIG1pc3NpbmduZXNzLgoKYGBge3J9CmRic2NhbjIwMApgYGAKCgpgYGB7cn0KZGdfdHJhaW4uY2x1c3RlcmVkIDwtIGRhdGEuZnJhbWUoZGdfdHJhaW4pCgpkZ190cmFpbi5jbHVzdGVyZWQkY2x1c3RlciA8LSBkYnNjYW4yMDAkY2x1c3RlcgoKZGdfdHJhaW4uY2x1c3RlcmVkCmBgYAoKYGBge3J9CmRnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkIDwtIGRhdGEuZnJhbWUoZGdfdHJhaW5fbWlzc2luZykKCmRnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkJGNsdXN0ZXIgPC0gZGJzY2FuMjAwJGNsdXN0ZXIKCmRnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkCmBgYAoKV2UgbmVlZCB0byBjaGVjayB0byBzZWUgaWYgd2UgY2FuIHBlcmZvcm0gaW1wdXRhdGlvbi4gSWYgYWxsIHRoZSB2YWx1ZXMgaW4gYSBjbHVzdGVyIGhhdmUgbi9hIHRoZW4gd2Ugd29udCBiZSBhYmxlIHRvIHBlcmZvcm0gdGhlIGltcHV0YXRpb24gYW5kIHRoZXJlZm9yZSBtYXkgbmVlZCB0byBjb25zaWRlciBjaGFuZ2luZyB0aGUgY2x1c3RlcmluZy4KCmBgYHtyfQpmb3IoaSBpbiAwOjE2KXsKICBhIDwtIGRnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkW2RnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkJGNsdXN0ZXIgPT0gaSxdCgogIGIgPC0gY29sU3Vtcyhpcy5uYShhKSkvbnJvdyhhKQogIAogIGlmKGJbImR1cmF0aW9uIl0gPT0gMSl7CiAgICBwcmludChwYXN0ZTAoIkNsdXN0ZXIgIiwgaSwgIiBoYXMgbm8gbm9uIG5hIHZhbHVlKHMpIikpCiAgfQp9CmBgYAoKV2Ugc2VlIGhlcmUgdGhhdCBhbGwgYnV0IDEgY2x1c3RlciBoYXMgdmFsdWVzIHRoYXQgYWxsb3cgdXMgdG8gaW1wdXRlLiBDbHVzdGVyIDQgaGFzIGFsbCBuL2EgdmFsdWVzIGFuZCB0aHVzIHdlIGNhbnQgdXNlIG1lYW4gaW1wdXRhdGlvbiB0byBmaWd1cmUgb3V0IHdoYXQgdGhlc2UgdmFsdWVzIHNob3VsZCBiZS4gV2UnbGwgY29uc2lkZXIgb3RoZXIgd2F5cyBvZiBpbXB1dGluZyBzb2xlbHkgZm9yIHRoaXMgY2x1c3RlciBhZnRlciB3ZSd2ZSBpbXB1dGVkIGZvciB0aGUgb3RoZXIgY2x1c3RlcnMuIE5vdGUgdGhhdCBub25lIG9mIHRoZSBvdGhlciB0ZXN0ZWQgY2x1c3RlcnMgcmVzdWx0IGluIGJldHRlciBvcHRpb25zLiBBbGwgdGhlIG90aGVyIGNsdXN0ZXJpbmcncyByZXN1bHQgaW4gbW9yZSBjbHVzdGVycyB3aXRoIG5vIHZhbHVlcyBlLmcuIGRic2NhbjQwMCBoYXMgMiBjbHVzdGVycyB3aXRoIGZ1bGwgbWlzc2luZ25lc3MgYW5kIGRic2NhbjMwIGhhcyAyMiBjbHVzdGVycyB3aXRoIGZ1bGwgbWlzc2luZ25lc3MuCgpgYGB7cn0KZm9yKGkgaW4gMDoxNil7CiAgYXNzaWduKHBhc3RlMCgiY2x1c3RlciIsaSksIGRnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkW2RnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkJGNsdXN0ZXIgPT0gaSxdKQp9CmBgYAoKYGBge3J9CmNsdXN0ZXJzIDwtIGMoY2x1c3RlcjAsY2x1c3RlcjEsY2x1c3RlcjIsY2x1c3RlcjMsY2x1c3RlcjQsY2x1c3RlcjUsY2x1c3RlcjYsY2x1c3RlcjcsY2x1c3RlcjgsY2x1c3RlcjksY2x1c3RlcjEwLGNsdXN0ZXIxMSxjbHVzdGVyMTIsY2x1c3RlcjEzLGNsdXN0ZXIxNCxjbHVzdGVyMTUsY2x1c3RlcjE2KQpgYGAKCldlJ2xsIHBsb3QgdGhlIGZpcnN0IGNsdXN0ZXIgaW4gYSBib3ggcGxvdCB0byB2aXN1YWxpc2Ugb3V0bGllcnMgYW5kIGFsc28gYXMgYSBjb21wYXJpc29uIGZvciBsYXRlci4KYGBge3J9Cm1lbHREYXRhIDwtIG1lbHQoY2x1c3RlcjApCnAgPC0gZ2dwbG90KG1lbHREYXRhLCBhZXMoZmFjdG9yKHZhcmlhYmxlKSwgdmFsdWUpKSAKZ2d0aXRsZShjKCJDbHVzdGVyOiAwIikpCnAgKyBnZW9tX2JveHBsb3QoKSArIGZhY2V0X3dyYXAofnZhcmlhYmxlLCBzY2FsZT0iZnJlZSIpCmBgYAoKYGBge3J9CgpkbWVhbnMgPSBjKCkKb2JtZWFucyA9IGMoKQpyYm1lYW5zID0gYygpCgpmb3IoaSBpbiAxOjE3KXsKICBhIDwtIGFzLmRhdGEuZnJhbWUoYyhjbHVzdGVyc1s0KmktM10sY2x1c3RlcnNbNCppLTJdLGNsdXN0ZXJzWzQqaS0xXSxjbHVzdGVyc1s0KmldKSkKICBtIDwtIGNvbE1lYW5zKGEsbmEucm0gPSBUUlVFKQogIHByaW50KHBhc3RlMCgiQ3VycmVudGx5IHdvcmtpbmcgb24gY2x1c3RlciAiLGktMSwgIi4iKSkKICAKICBkbWVhbnMgPSBjKGRtZWFucyxtWzFdKQogIG9ibWVhbnMgPSBjKG9ibWVhbnMsbVsyXSkKICByYm1lYW5zID0gYyhyYm1lYW5zLG1bM10pCiAgCiAgZm9yKGsgaW4gKDE6MykpewogICAgZm9yKGogaW4gKDE6bnJvdyhhKSkpewogICAgICBpZihpcy5uYShhW2osa10pKXsKICAgICAgICBhW2osa10gPSBtW2tdCiAgICAgICAgCiAgICAgIH0KICAgIH0KICB9CiAgIGFzc2lnbihwYXN0ZTAoImNsdXN0ZXIiLGktMSksYSkKfQpgYGAKCldlJ2xsIGZpbmFsbHkgZ2V0IHRoZSB0YWJsZSBvZiBtZWFucyB0aGF0IHdlIHdhbnRlZC4gVGhpcyBnaXZlcyB1cyB0aGUgbWVhbiBvZiBlYWNoIG1pc3NpbmcgY29sdW1uIGFuZCB0aGUgY2x1c3RlciB0aGV5J3JlIGZyb20uCgpgYGB7cn0KbWVhbnMgPC0gZGF0YS5mcmFtZSgiY2x1c3RlciIgPSBzZXEoMCwxNiksICJkdXJhdGlvbiBtZWFucyI9IGRtZWFucywgIm9yaWdpbl9ieXRlcyBtZWFucyIgPSBvYm1lYW5zLCAicmVzcF9ieXRlcyBtZWFucyAiID0gcmJtZWFucykKYGBgCgpgYGB7cn0KcGRmKCJtZWFucy5wZGYiLCBoZWlnaHQ9MTEsIHdpZHRoPTEwKQpncmlkLnRhYmxlKG1lYW5zKQpkZXYub2ZmKCkKCm1lYW5zCmBgYAoKRmluYWxseSwgd2UnbGwgdGVzdCB0byBzZWUgaG93IHRoaXMgaW1wdXRhdGlvbiBoYXMgd29ya2VkLiBXZSdsbCBsb29rIGF0IHRoZSBlcnJvciBpLmUuIHRoZSBkaWZmZXJlbmNlIGJldHdlZW4gdGhlIG1lYW5zIHByb2R1Y2VkIGZyb20gdGhlIGNsdXN0ZXJlZCB0cmFpbmluZyBkYXRhIGFuZCB0aGUgdHJhaW5pbmcgZGF0YSB3ZSdsbCBjbHVzdGVyIG5vdy4gV2UnbGwgdXNlIHRoZSBzYW1lIHBhcmFtZXRlcnMgYXMgZGVmaW5lZCBhYm92ZSB0byBtYWludGFpbiBjb25zaXN0ZW5jeSAtIGlmIHdlIHdlcmUgdG8gY2hlY2sgdGhlc2UgcGFyYW1ldGVycywgd2Ugc2hvdWxkIHNlZSBzaW1pbGFyIG9uZXMgc2luY2UgdGhleSBhcmUgYm90aCByYW5kb20gc2FtcGxlcyBvZiB0aGUgZGF0YS4KCmBgYHtyfQpkZ190ZXN0LnN2ZCA8LSBzdmQoZGdfdGVzdCkKYGBgCgpgYGB7cn0KaT0xO2o9MgpwbG90KGRnX3Rlc3Quc3ZkJHVbLGldLAogICAgIGRnX3Rlc3Quc3ZkJHVbLGpdLHR5cGU9InAiLAogICAgIGNvbD0iIzMzMzMzMzExIixwY2g9MTYsY2V4PTEpCmBgYAoKYGBge3J9CmRic2NhbjIwMFRlc3QgPSBkYnNjYW4oZGdfdGVzdC5zdmQkdVssMTpucGNzXSxlcHMgPSAwLjAwMDEsbWluUHRzID0gMjAwKQpgYGAKCmBgYHtyfQpkYnNjYW4yMDBUZXN0CmBgYAoKClNvIHdoYXQgd2UgZmluZCBpcyB0aGF0IHRoZSBjbHVzdGVyaW5nIGZvciB0aGUgdGVzdCBzcGxpdCBwdXRzIGFsbCAyMDAwMCBkYXRhIHBvaW50cyBpbnRvIHRoZSBmaXJzdCBjbHVzdGVyLiBXZSdsbCBoYXZlIGEgbG9vayBhdCB3aGF0IHJlc3VsdCB0aGlzIGdpdmVzIGJ1dCB0aGlzIHVsdGltYXRlbHkgbG9va3MgbGlrZSBpdCB3b250IHJlc3VsdCBpbiBhbnkgZnJ1aXRmdWwgY29tcGFyaXNvbiB0byBzZWUgaG93IHdlbGwgREJTQ0FOIHBlcmZvcm1lZC4KCmBgYHtyfQpkZ190ZXN0LmNsdXN0ZXJlZCA8LSBkYXRhLmZyYW1lKGRnX3Rlc3QpCgpkZ190ZXN0LmNsdXN0ZXJlZCRjbHVzdGVyIDwtIGRic2NhbjIwMFRlc3QkY2x1c3RlcgpgYGAKCmBgYHtyfQpkZ190ZXN0X21pc3NpbmcuY2x1c3RlcmVkIDwtIGRhdGEuZnJhbWUoZGdfdGVzdF9taXNzaW5nKQoKZGdfdGVzdF9taXNzaW5nLmNsdXN0ZXJlZCRjbHVzdGVyIDwtIGRic2NhbjIwMFRlc3QkY2x1c3RlcgoKZGdfdGVzdF9taXNzaW5nLmNsdXN0ZXJlZApgYGAKCmBgYHtyfQpjbHVzdGVyMHRlc3RtZWFucyA8LSBhcy5kYXRhLmZyYW1lKGNvbE1lYW5zKGRnX3Rlc3RfbWlzc2luZy5jbHVzdGVyZWQsIG5hLnJtID0gVFJVRSkpCgpjbHVzdGVyMHRlc3RtZWFucwoKYGBgCgpgYGB7cn0KY2x1c3RlcjB0cmFpbm1lYW5zIDwtIG1lYW5zWzEsXQpgYGAKCgpgYGB7cn0KZGlmZm1lYW5zID0gYygpCgpmb3IoaSBpbiAyOjQpewogIHRyYWlubSA8LSBjbHVzdGVyMHRyYWlubWVhbnNbaV0KICB0ZXN0bSA8LSBjbHVzdGVyMHRlc3RtZWFuc1tpLTEsXQogIGRpZmYgPC0gMSAtICh0ZXN0bS90cmFpbm0pCiAgZGlmZm1lYW5zIDwtIGMoZGlmZm1lYW5zLCBkaWZmKQp9CmBgYAoKYGBge3J9CmFzLmRhdGEuZnJhbWUoZGlmZm1lYW5zKQpgYGAKClRodXMgd2UgaGF2ZSBhIHZlcnkgbGFyZ2UgZGlmZmVyZW5jZSBpbiB0aGUgbWVhbnMgb2Ygb3VyIHRyYWluaW5nIGRhdGEgYW5kIHRoZSBtZWFucyBvZiB0ZXN0IGRhdGEgYW5kIHRodXMgd2UgbWF5IGFzc3VtZSB0aGF0IERCU0NBTiBpbiB0aGlzIGNhc2UgZG9lc24ndCBwZXJmb3JtIHZlcnkgd2VsbC4KCmBgYHtyfQp3cml0ZS5jc3YobWVhbnMsIm1hdHRtZWFucy5jc3YiKQpgYGAKClRoZSBmaW5hbCBzdGVwIGlzIHRvIGltcHV0ZSB0aGUgZGF0YSBpbnRvIHRoZSBvcmlnaW5hbCBkYXRhIGZyYW1lLiBUaGUgYmVsb3cgZnVuY3Rpb24gaXMgKipWRVJZKiogc2xvdyAoSSdtIG5vdCBzdXJlIGhvdyB0byBvcHRpbWlzZSBpdCkgYnV0IGl0IGRvZXMgZ2V0IHRoZSBjb3JyZWN0IG91dHB1dC4KCmBgYHtyfQpmb3IoaSBpbiAxOm5yb3coZGdfdHJhaW5fbWlzc2luZy5jbHVzdGVyZWQpKXsKICBjbHVzdGVyIDwtIGRnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkW2ksNF0KICBmb3IoaiBpbiAxOjQpewogICAgZG1lYW4gPC0gIG1lYW5zW2NsdXN0ZXIrMSwyXQogICAgb2JtZWFuIDwtIG1lYW5zW2NsdXN0ZXIrMSwzXQogICAgcmJtZWFuIDwtIG1lYW5zW2NsdXN0ZXIrMSw0XQogICAgCiAgICBkZ190cmFpbl9taXNzaW5nLmNsdXN0ZXJlZFtpLDFdIDwtIGRtZWFuCiAgICBkZ190cmFpbl9taXNzaW5nLmNsdXN0ZXJlZFtpLDJdIDwtIG9ibWVhbgogICAgZGdfdHJhaW5fbWlzc2luZy5jbHVzdGVyZWRbaSwzXSA8LSByYm1lYW4KICB9Cn0KYGBgCgpgYGB7cn0KZGdfdHJhaW4kZHVyYXRpb24gPC0gZGdfdHJhaW5fbWlzc2luZy5jbHVzdGVyZWQkZHVyYXRpb24KZGdfdHJhaW4kb3JpZ19ieXRlcyA8LSBkZ190cmFpbl9taXNzaW5nLmNsdXN0ZXJlZCRvcmlnX2J5dGVzCmRnX3RyYWluJHJlc3BfYnl0ZXMgPC0gZGdfdHJhaW5fbWlzc2luZy5jbHVzdGVyZWQkcmVzcF9ieXRlcwpgYGAKCmBgYHtyfQpkZ190cmFpbgpgYGAKCkZvciByZWZlcmVuY2UsIHRoZSBoZWFkZXIgb2YgQWxleCdzIGRhdGEgKHdoaWNoIG1hdGNoZXMgdGhlIGRhdGEgYWJvdmUpIGlzIGdpdmVuIGJlbG93LgoKYGBge3J9CmltIDwtIGxvYWQuaW1hZ2UoIkFsZXhNZWFucy5wbmciKQpwbG90KGltKQpgYGAKCgpGaW5hbGx5LCB3ZSdyZSBnb2luZyB0byB2aXN1YWxpc2UgdGhlIGNsdXN0ZXJpbmcgdXNpbmcgdC1TTkUgcHJvamVjdGlvbi4gVGhlIG1haW4gYWltIG9mIHRoZSBwcm9qZWN0IHdhcyB0byBwZXJmb3JtIGltcHV0YXRpb24gYnV0IGJlaW5nIGFibGUgdG8gYWN0dWFsbHkgdmlzdWFsaXNlIHRoZSBjbHVzdGVycyBpcyBpbXBvcnRhbnQgdG9vLiBUaGUgcGxvdHMgYWJvdmUgaGVscCB1cyB1bmRlcnN0YW5kIHRoZSBkYXRhIGJ1dCBhcmUgaGFyZCB0byBpbmZlciBhbnl0aGluZyBmcm9tLiBXZSdsbCB2aXN1YWxpc2UgdGhlIERCU0NBTjIwMCBkYXRhIGJlbG93LgoKYGBge3J9CnJ0c25lX291dCA8LSBSdHNuZShhcy5tYXRyaXgoZGdfdHJhaW4uY2x1c3RlcmVkKSwgcGNhID0gRkFMU0UsIHZlcmJvc2UgPSBUUlVFLCBjaGVja19kdXBsaWNhdGVzID0gRkFMU0UpCmBgYAoKYGBge3J9CnBsb3QocnRzbmVfb3V0JFksIGFzcCA9IDEsIHBjaCA9IDIwLCAKICAgICBjZXggPSAwLjEsIGNleC5heGlzID0gMS4yNSwgY2V4LmxhYiA9IDEuMjUsIGNleC5tYWluID0gMS41LCAKICAgICB4bGFiID0gInQtU05FIGRpbWVuc2lvbiAxIiwgeWxhYiA9ICJ0LVNORSBkaW1lbnNpb24gMiIsIAogICAgIG1haW4gPSAiMkQgdC1TTkUgcHJvamVjdGlvbiIsY29sPWMoIiM2NjY2NjY2NiIscmFpbmJvdyg0MSkpW2Ric2NhbjIwMCRjbHVzdGVyKzFdKQpgYGAKCgpXZSdsbCBhbHNvIGxvb2sgYXQgYSBwbG90IHVzaW5nIHVtYXAuCgpgYGB7cn0KZGF0YS51bWFwIDwtIHVtYXAoZGdfdHJhaW4uY2x1c3RlcmVkLCBpbml0PSJzcGVjdHJhbCIpCmBgYAoKYGBge3J9CnBsb3QoZGF0YS51bWFwLCBhc3AgPSAxLCBwY2ggPSAyMCwgCiAgICAgY2V4ID0gMC4yLCBjZXguYXhpcyA9IDEuMjUsIGNleC5sYWIgPSAxLjI1LCBjZXgubWFpbiA9IDEuNSwgCiAgICAgbWFpbiA9ICIyRCB1bWFwIHByb2plY3Rpb24iLGNvbD1jKCIjNjY2NjY2NjYiLHJhaW5ib3coNDEpKVtkYnNjYW4yMDAkY2x1c3RlcisxXSkKYGBgCgpUaGUgZGlmZmVyZW5jZSBpcyBzdGFydGxpbmcuIFdoZXJlYXMgdGhlIHRzbmUgcGxvdCBsb29rcyBmYWlybHkganVtYmxlZCB3aXRoIGNsdXN0ZXJzLCB3aXRoIG5vIGNsdXN0ZXJzIGFjdHVhbGx5IHNlZW1pbmcgdG8gYXBwZWFyIGFuZCBtb3JlIHNjYXR0ZXJpbmcgd2l0aGluIGl0LCB0aGUgdW1hcCBwbG90IGhhcyB2ZXJ5IGRpc2NyZXRlIGNsdXN0ZXJzIGFuZCBnaXZlcyBhIG11Y2ggYmV0dGVyIHZpc3VhbGlzYXRpb24uIFdlIGdldCBzb21lIHNjYXR0ZXJpbmcgYmV0d2VlbiBjbHVzdGVycyB3aXRoIGdyZXkvcmVkIHBvaW50cyBvY2Nhc2lvbmFsbHkgc2hvd2luZyB1cCB3aGVyZSB3ZSBkb24ndCBuZWNlc3NhcmlseSBleHBlY3QgdGhlbSBidXQgb3ZlcmFsbCB0aGUgY2x1c3RlcnMgbG9vayB2ZXJ5IGluZGVwZW5kZW50LiBXaXRoIHRoaXMgaW4gbWluZCwgSSB3b3VsZCBwcmVzdW1lIHRoYXQgdGhlIGNsdXN0ZXJpbmcgd2l0aCBhIG1pbmltdW0gcG9pbnRzIG9mIDIwMCBkb2VzIHByb2R1Y2UgdmFsaWQgY2x1c3RlcnMgYW5kIGlzIGEgZ29vZCB3YXkgdG8gcGVyZm9ybSBpbXB1dGF0aW9uIGJhc2VkIG9uIGNsdXN0ZXJzLCBkZXNwaXRlIHNvbWUgb2YgdGhlIGVhcmxpZXIgaXNzdWVzIHRoYXQgbWF5IHN0aWxsIGJlIHZhbGlkLiBBZGRpdGlvbmFsbHksIHRoZSB1bWFwIHByb2plY3Rpb24gaXMgaW5jcmVkaWJseSBmYXN0IGNvbXBhcmVkIHRvIHRoZSB0c25lIHByb2plY3Rpb24gYW5kIHRoZXJlZm9yZSBpcyBjb21wdXRhdGlvbmFsbHkgbW9yZSB1c2VmdWwuCgpUbyBmaW5pc2gsIHdlJ2xsIGxvb2sgYXQgQWxleCdzIGNsdXN0ZXJpbmcgY29tcGFyZWQgdG8gb3Vycy4KCmBgYHtyfQppbSA8LSBsb2FkLmltYWdlKCJCdXNfU2VhdC5wbmciKQpwbG90KGltKQpgYGAKCkFsZXgncyBjbHVzdGVyaW5nIGRvZXNuJ3QgaGF2ZSB0aGUgbGFyZ2UgbWFqb3JpdHkgY2x1c3RlciB0aGF0IG91cnMgcHJvZHVjZXMgYW5kIG1heSBiZSB0aGUgcmVhc29uIHRoYXQgaGlzIHJlc3VsdHMgYXJlIG1vcmUgYWNjdXJhdGUuIFRoZSBpbml0aWFsaXNhdGlvbiBvZiByYW5kb20gbWVhbnMgYW5kIHRoZXJlZm9yZSByYW5kb20gY2x1c3RlcnMgbWF5IGFsbG93IHRoZSBrLW1lYW5zIGFsZ29yaXRobSB0bywgaW4gdGhpcyBjYXNlLCB3b3JrIGJldHRlciB0aGFuIHRoZSBEQlNDQU4gYWxnb3JpdGhtLiBJdCBpcyBhbHNvIHdvcnRod2hpbGUgbm90aW5nIHRoYXQgQWxleCBkb2VuJ3QgZ2V0IGEgY2x1c3RlciBmdWxsIG9mIE5BJ3MgYXMgd2UgZG8uCgpSZWZlcmVuY2VzOgoKMS4gW0RhdGEgZnJvbSBTZWNSZXBvXShodHRwczovL3d3dy5zZWNyZXBvLmNvbSkKCjIuIFtDb252ZXJ0aW5nIGNhdGVnb3JpY2FsIHZhcmlhYmxlc10oaHR0cHM6Ly9zdGFja292ZXJmbG93LmNvbS9xdWVzdGlvbnMvNDc5MjIxODQvY29udmVydC1jYXRlZ29yaWNhbC12YXJpYWJsZXMtdG8tbnVtZXJpYy1pbi1yLzQ3OTIzMTc4KQoKMy4gW0FkZGluZyBjb2x1bW5zIHRvIGRhdGEgZnJhbWVzXShodHRwczovL2Rpc2N1c3MuYW5hbHl0aWNzdmlkaHlhLmNvbS90L2hvdy10by1hZGQtYS1jb2x1bW4tdG8tYS1kYXRhLWZyYW1lLWluLXIvMzI3OCkKCjQuIFtGaW5kaW5nIFVuaXF1ZSBWYWx1ZXNdKGh0dHBzOi8vc3RhY2tvdmVyZmxvdy5jb20vcXVlc3Rpb25zLzQxOTA2ODc4L3ItbnVtYmVyLW9mLXVuaXF1ZS12YWx1ZXMtaW4tYS1jb2x1bW4tb2YtZGF0YS1mcmFtZSkKCjUuIFtEQlNDQU4gb24gZmxvd2Vyc10oaHR0cHM6Ly93d3cuZ2Vla3Nmb3JnZWVrcy5vcmcvZGJzY2FuLWNsdXN0ZXJpbmctaW4tci1wcm9ncmFtbWluZy8pCgo2LiBbU2F2aW5nIFBsb3RzIChjcmVkaXQgbXVzdCBhbHNvIGJlIGdpdmVuIHRvIEFsZXggZm9yIGhlbHBpbmcgbWUgb3V0IGEgaHVnZSBhbW91bnQgaGVyZSldKGh0dHA6Ly93d3cuc3RoZGEuY29tL2VuZ2xpc2gvd2lraS9jcmVhdGluZy1hbmQtc2F2aW5nLWdyYXBocy1yLWJhc2UtZ3JhcGhzKQoKNy4gW0RCU0NBTiBQYXJhbWV0ZXIgRXN0aW1hdGlvbl0oaHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvREJTQ0FOI1BhcmFtZXRlcl9lc3RpbWF0aW9uKQoKOC4gW0ZpbmRpbmcgdGhlIGtuZWUgaW4ga05ORGlzdF0oaHR0cHM6Ly93d3cucmRvY3VtZW50YXRpb24ub3JnL3BhY2thZ2VzL2Ric2Nhbi92ZXJzaW9ucy8xLjEtNS90b3BpY3Mva05OZGlzdCkKCjkuIFtTaWxob3VldHRlIFNjb3JlIGludHJvZHVjdGlvbl0oaHR0cHM6Ly9tZWRpdW0uY29tL2NvZGVzbWFydC9yLXNlcmllcy1rLW1lYW5zLWNsdXN0ZXJpbmctc2lsaG91ZXR0ZS03OTQ3NzRiNDY1ODYpCgoxMC4gW0Vycm9yIHdpdGggc2lsaG91ZXR0ZSBzY29yZV0oaHR0cHM6Ly9zdGFja292ZXJmbG93LmNvbS9xdWVzdGlvbnMvNTEyNDgyOTMvZXJyb3ItdmVjdG9yLW1lbW9yeS1leGhhdXN0ZWQtbGltaXQtcmVhY2hlZC1yLTMtNS0wLW1hY29zKQoKMTEuIFtTaWxob3VldHRlIEZ1bmN0aW9uXShodHRwczovL3d3dy5yZG9jdW1lbnRhdGlvbi5vcmcvcGFja2FnZXMvY2x1c3Rlci92ZXJzaW9ucy8yLjEuMC90b3BpY3Mvc2lsaG91ZXR0ZSkKCjEyLiBbQXNzaWduIGZ1bmN0aW9uIGZvciBjcmVhdGluZyBtdWx0aXBsZSBkYXRhIGZyYW1lcyBhdCBvbmNlXShodHRwczovL3N0YWNrb3ZlcmZsb3cuY29tL3F1ZXN0aW9ucy80NDU3NTExMC9mb3ItbG9vcC1mb3ItY3JlYXRpbmctbXVsdGlwbGUtZGF0YS1mcmFtZXMtYW5kLWFzc2lnbmluZy12YWx1ZXMpCgoxMy4gW0V4cG9ydGluZyBhIGRhdGEgZnJhbWUgYXMgYSBwZGZdKGh0dHBzOi8vc3RhY2tvdmVyZmxvdy5jb20vcXVlc3Rpb25zLzQyODYwNzE2L2V4cG9ydC1kYXRhZnJhbWUtdG8tcGRmLXBuZy1pbi1yKQoKMTQuIFtQbG90dGluZyBtdWx0aXBsZSBib3ggcGxvdHMgdXNpbmcgZ2dwcGxvdF0oaHR0cHM6Ly9zdGFja292ZXJmbG93LmNvbS9xdWVzdGlvbnMvMTEzNDY4ODAvci1wbG90LW11bHRpcGxlLWJveC1wbG90cy11c2luZy1jb2x1bW5zLWZyb20tZGF0YS1mcmFtZSkKCjE1LiBbVXNpbmcgdGhlIHV3b3QgcGFja2FnZV0oaHR0cHM6Ly93d3cucmRvY3VtZW50YXRpb24ub3JnL3BhY2thZ2VzL3V3b3QvdmVyc2lvbnMvMC4wLjAuOTAwOSk=